//go through all the models and output MAE and RMSE

clear all
set more off


**** GLOBALS *****;
global 	InputFile ${dat}\HSV_repdata.dta
global 	OutputDir ${res}
global straps  500
*********************;


use ${InputFile} , clear



//levels of pre and postgov inc
gen pregovinc = max(redpregovinc - tot_deduction + 0.5*fica,0)
gen dispinc = max(reddispinc - tot_deduction + 0.5*fica + hwdpvpengain1,0)

//logs of pre and postgov inc
replace lpregovinc = log(redpregovinc - tot_deduction + 0.5*fica)
replace ldispinc = log(reddispinc - tot_deduction + 0.5*fica + hwdpvpengain1)


forvalues samps=1/1 {
	forvalues t=2000(2)2006	{


		preserve
		keep if year==`t'
		

		// estimate the model by HSV and compute RMSE, MAE, and plot 
		bootstrap , reps($straps): reg ldispinc lpregovinc
		est sto log_mod
		gen indi=e(sample)
		keep if indi==1
		predict yhatt, xb
		predict ress, r
		sum ress
		gen yhat_log=exp(yhatt+((r(sd))^2)/2)
		
		
		// calculate RMSE
		gen help=(dispinc-yhat_log)^2
		sum help if indi
		scalar rmse_log_s`samps'=sqrt(r(mean))
		drop help
		
		// calculate MAE
		gen help=abs(dispinc-yhat_log)
		sum help if indi
		scalar mae_log_s`samps'=r(mean)
		drop help

		// estimate the model using NLS and compute RMSE, MAE, and plot
		bootstrap , reps($straps): poisson dispinc lpregovinc
		est sto nl_mod
		predict yhat_nl, ir
		replace yhat_nl=. if indi!=1
		
		// calculate RMSE
		gen help=(dispinc-yhat_nl)^2
		sum help if indi
		scalar rmse_nl_s`samps'=sqrt(r(mean))
		drop help
		
		// calculate MAE
		gen help=abs(dispinc-yhat_nl)
		sum help if indi
		scalar mae_nl_s`samps'=r(mean)
		drop help
		
		//produce figure comparing absolute errors
		gen err_log=abs(dispinc-yhat_log) if indi
		gen err_nl=abs(dispinc-yhat_nl) if indi
		tw scatter err_log pregovinc , msymbol(Dh) msize(tiny) || scatter err_nl pregovinc  , msymbol(O) msize(tiny) scheme(s1mono) legend(off) xtitle(Gross Income) xlabel(,labsize(small)) ylabel(,labsize(small))
		graph export ${OutputDir}\abs_err_comp_Y`t'_s`samps'.png, replace
		
		//produce figure that gives reduction of weight
		gen err_rel=(dispinc-yhat_log)^2/(dispinc-yhat_nl)^2 if indi
		qui sum err_rel
		replace err_rel=err_rel/r(max) if indi
		tw scatter err_rel pregovinc , msymbol(Dh) msize(tiny) scheme(s1mono) legend(off) xtitle(Gross Income) ytitle(Relative Weight) 
		graph export ${OutputDir}\err_w_comp_Y`t'_s`samps'.png, replace
		
		
		//export RMSE and MAE
		mat A=[rmse_log_s`samps',mae_log_s`samps',rmse_nl_s`samps',mae_nl_s`samps']
		putexcel set "${OutputDir}\GOFs_Y`t'_s`samps'", sheet(1) replace
		putexcel A1=matrix(A)
		
		//export the estimates
		esttab log_mod nl_mod using "${OutputDir}\nl_log_estimates_Y`t'_s`samps'.tex", star(* 0.10 ** 0.05 *** 0.01) se replace
		restore

	}
}
